import polars as pl
import polars.selectors as cs
import seaborn as sbn
import matplotlib.pyplot as plt
# Make the graphs a bit prettier, and bigger
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 5)
print(pl.__version__)1.6.0
Let’s continue with our NYC 311 service requests example.
# because of mixed types we specify dtype to prevent any errors
complaints = pl.read_csv('../data/311-service-requests.csv', schema_overrides={'Incident Zip':pl.String})3.1 Selecting only noise complaints
I’d like to know which borough has the most noise complaints. First, we’ll take a look at the data to see what it looks like:
complaints.head()| Unique Key | Created Date | Closed Date | Agency | Agency Name | Complaint Type | Descriptor | Location Type | Incident Zip | Incident Address | Street Name | Cross Street 1 | Cross Street 2 | Intersection Street 1 | Intersection Street 2 | Address Type | City | Landmark | Facility Type | Status | Due Date | Resolution Action Updated Date | Community Board | Borough | X Coordinate (State Plane) | Y Coordinate (State Plane) | Park Facility Name | Park Borough | School Name | School Number | School Region | School Code | School Phone Number | School Address | School City | School State | School Zip | School Not Found | School or Citywide Complaint | Vehicle Type | Taxi Company Borough | Taxi Pick Up Location | Bridge Highway Name | Bridge Highway Direction | Road Ramp | Bridge Highway Segment | Garage Lot Name | Ferry Direction | Ferry Terminal Name | Latitude | Longitude | Location |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | i64 | i64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | f64 | f64 | str |
| 26589651 | "10/31/2013 02:08:41 AM" | null | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Talking" | "Street/Sidewalk" | "11432" | "90-03 169 STREET" | "169 STREET" | "90 AVENUE" | "91 AVENUE" | null | null | "ADDRESS" | "JAMAICA" | null | "Precinct" | "Assigned" | "10/31/2013 10:08:41 AM" | "10/31/2013 02:35:17 AM" | "12 QUEENS" | "QUEENS" | 1042027 | 197389 | "Unspecified" | "QUEENS" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.708275 | -73.791604 | "(40.70827532593202, -73.791603… |
| 26593698 | "10/31/2013 02:01:04 AM" | null | "NYPD" | "New York City Police Departmen… | "Illegal Parking" | "Commercial Overnight Parking" | "Street/Sidewalk" | "11378" | "58 AVENUE" | "58 AVENUE" | "58 PLACE" | "59 STREET" | null | null | "BLOCKFACE" | "MASPETH" | null | "Precinct" | "Open" | "10/31/2013 10:01:04 AM" | null | "05 QUEENS" | "QUEENS" | 1009349 | 201984 | "Unspecified" | "QUEENS" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.721041 | -73.909453 | "(40.721040535628305, -73.90945… |
| 26594139 | "10/31/2013 02:00:24 AM" | "10/31/2013 02:40:32 AM" | "NYPD" | "New York City Police Departmen… | "Noise - Commercial" | "Loud Music/Party" | "Club/Bar/Restaurant" | "10032" | "4060 BROADWAY" | "BROADWAY" | "WEST 171 STREET" | "WEST 172 STREET" | null | null | "ADDRESS" | "NEW YORK" | null | "Precinct" | "Closed" | "10/31/2013 10:00:24 AM" | "10/31/2013 02:39:42 AM" | "12 MANHATTAN" | "MANHATTAN" | 1001088 | 246531 | "Unspecified" | "MANHATTAN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.84333 | -73.939144 | "(40.84332975466513, -73.939143… |
| 26595721 | "10/31/2013 01:56:23 AM" | "10/31/2013 02:21:48 AM" | "NYPD" | "New York City Police Departmen… | "Noise - Vehicle" | "Car/Truck Horn" | "Street/Sidewalk" | "10023" | "WEST 72 STREET" | "WEST 72 STREET" | "COLUMBUS AVENUE" | "AMSTERDAM AVENUE" | null | null | "BLOCKFACE" | "NEW YORK" | null | "Precinct" | "Closed" | "10/31/2013 09:56:23 AM" | "10/31/2013 02:21:10 AM" | "07 MANHATTAN" | "MANHATTAN" | 989730 | 222727 | "Unspecified" | "MANHATTAN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.778009 | -73.980213 | "(40.7780087446372, -73.9802134… |
| 26590930 | "10/31/2013 01:53:44 AM" | null | "DOHMH" | "Department of Health and Menta… | "Rodent" | "Condition Attracting Rodents" | "Vacant Lot" | "10027" | "WEST 124 STREET" | "WEST 124 STREET" | "LENOX AVENUE" | "ADAM CLAYTON POWELL JR BOULEVA… | null | null | "BLOCKFACE" | "NEW YORK" | null | "N/A" | "Pending" | "11/30/2013 01:53:44 AM" | "10/31/2013 01:59:54 AM" | "10 MANHATTAN" | "MANHATTAN" | 998815 | 233545 | "Unspecified" | "MANHATTAN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.807691 | -73.947387 | "(40.80769092704951, -73.947387… |
To get the noise complaints, we need to find the rows where the “Complaint Type” column is “Noise - Street/Sidewalk”. I’ll show you how to do that, and then explain what’s going on.
noise_complaints = complaints.filter(pl.col('Complaint Type') == "Noise - Street/Sidewalk")
noise_complaints.head(3)| Unique Key | Created Date | Closed Date | Agency | Agency Name | Complaint Type | Descriptor | Location Type | Incident Zip | Incident Address | Street Name | Cross Street 1 | Cross Street 2 | Intersection Street 1 | Intersection Street 2 | Address Type | City | Landmark | Facility Type | Status | Due Date | Resolution Action Updated Date | Community Board | Borough | X Coordinate (State Plane) | Y Coordinate (State Plane) | Park Facility Name | Park Borough | School Name | School Number | School Region | School Code | School Phone Number | School Address | School City | School State | School Zip | School Not Found | School or Citywide Complaint | Vehicle Type | Taxi Company Borough | Taxi Pick Up Location | Bridge Highway Name | Bridge Highway Direction | Road Ramp | Bridge Highway Segment | Garage Lot Name | Ferry Direction | Ferry Terminal Name | Latitude | Longitude | Location |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | i64 | i64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | f64 | f64 | str |
| 26589651 | "10/31/2013 02:08:41 AM" | null | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Talking" | "Street/Sidewalk" | "11432" | "90-03 169 STREET" | "169 STREET" | "90 AVENUE" | "91 AVENUE" | null | null | "ADDRESS" | "JAMAICA" | null | "Precinct" | "Assigned" | "10/31/2013 10:08:41 AM" | "10/31/2013 02:35:17 AM" | "12 QUEENS" | "QUEENS" | 1042027 | 197389 | "Unspecified" | "QUEENS" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.708275 | -73.791604 | "(40.70827532593202, -73.791603… |
| 26594086 | "10/31/2013 12:54:03 AM" | "10/31/2013 02:16:39 AM" | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Music/Party" | "Street/Sidewalk" | "10310" | "173 CAMPBELL AVENUE" | "CAMPBELL AVENUE" | "HENDERSON AVENUE" | "WINEGAR LANE" | null | null | "ADDRESS" | "STATEN ISLAND" | null | "Precinct" | "Closed" | "10/31/2013 08:54:03 AM" | "10/31/2013 02:07:14 AM" | "01 STATEN ISLAND" | "STATEN ISLAND" | 952013 | 171076 | "Unspecified" | "STATEN ISLAND" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.636182 | -74.11615 | "(40.63618202176914, -74.116150… |
| 26591573 | "10/31/2013 12:35:18 AM" | "10/31/2013 02:41:35 AM" | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Talking" | "Street/Sidewalk" | "10312" | "24 PRINCETON LANE" | "PRINCETON LANE" | "HAMPTON GREEN" | "DEAD END" | null | null | "ADDRESS" | "STATEN ISLAND" | null | "Precinct" | "Closed" | "10/31/2013 08:35:18 AM" | "10/31/2013 01:45:17 AM" | "03 STATEN ISLAND" | "STATEN ISLAND" | 929577 | 140964 | "Unspecified" | "STATEN ISLAND" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.553421 | -74.196743 | "(40.55342078716953, -74.196743… |
If you look at noise_complaints, you’ll see that this worked, and it only contains complaints with the right complaint type. But how does this work? Let’s deconstruct it into two pieces
pl.col('Complaint Type') == "Noise - Street/Sidewalk"[(col(“Complaint Type”)) == (String(Noise - Street/Sidewalk))]
This is a polars expression, which represents a transformation of Series. In this case, this expression represents a mapping from ‘Complaint Type’ Series (a str) to a boolean Series based on the predecate. The “filter” function will take in any expression which evaluates to a boolean Series.
You can also store and combine more than one expression with the & operator like this:
is_noise = pl.col('Complaint Type') == "Noise - Street/Sidewalk"
in_brooklyn = pl.col('Borough') == "BROOKLYN"
complaints.filter(is_noise & in_brooklyn).head()| Unique Key | Created Date | Closed Date | Agency | Agency Name | Complaint Type | Descriptor | Location Type | Incident Zip | Incident Address | Street Name | Cross Street 1 | Cross Street 2 | Intersection Street 1 | Intersection Street 2 | Address Type | City | Landmark | Facility Type | Status | Due Date | Resolution Action Updated Date | Community Board | Borough | X Coordinate (State Plane) | Y Coordinate (State Plane) | Park Facility Name | Park Borough | School Name | School Number | School Region | School Code | School Phone Number | School Address | School City | School State | School Zip | School Not Found | School or Citywide Complaint | Vehicle Type | Taxi Company Borough | Taxi Pick Up Location | Bridge Highway Name | Bridge Highway Direction | Road Ramp | Bridge Highway Segment | Garage Lot Name | Ferry Direction | Ferry Terminal Name | Latitude | Longitude | Location |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | i64 | i64 | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | str | f64 | f64 | str |
| 26595564 | "10/31/2013 12:30:36 AM" | null | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Music/Party" | "Street/Sidewalk" | "11236" | "AVENUE J" | "AVENUE J" | "EAST 80 STREET" | "EAST 81 STREET" | null | null | "BLOCKFACE" | "BROOKLYN" | null | "Precinct" | "Open" | "10/31/2013 08:30:36 AM" | null | "18 BROOKLYN" | "BROOKLYN" | 1008937 | 170310 | "Unspecified" | "BROOKLYN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.634104 | -73.911055 | "(40.634103775951736, -73.91105… |
| 26595553 | "10/31/2013 12:05:10 AM" | "10/31/2013 02:43:43 AM" | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Talking" | "Street/Sidewalk" | "11225" | "25 LEFFERTS AVENUE" | "LEFFERTS AVENUE" | "WASHINGTON AVENUE" | "BEDFORD AVENUE" | null | null | "ADDRESS" | "BROOKLYN" | null | "Precinct" | "Closed" | "10/31/2013 08:05:10 AM" | "10/31/2013 01:29:29 AM" | "09 BROOKLYN" | "BROOKLYN" | 995366 | 180388 | "Unspecified" | "BROOKLYN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.661793 | -73.959934 | "(40.6617931276793, -73.9599336… |
| 26594653 | "10/30/2013 11:26:32 PM" | "10/31/2013 12:18:54 AM" | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Music/Party" | "Street/Sidewalk" | "11222" | null | null | null | null | "DOBBIN STREET" | "NORMAN STREET" | "INTERSECTION" | "BROOKLYN" | null | "Precinct" | "Closed" | "10/31/2013 07:26:32 AM" | "10/31/2013 12:18:54 AM" | "01 BROOKLYN" | "BROOKLYN" | 996925 | 203271 | "Unspecified" | "BROOKLYN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.7246 | -73.954271 | "(40.724599563793525, -73.95427… |
| 26591992 | "10/30/2013 10:02:58 PM" | "10/30/2013 10:23:20 PM" | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Talking" | "Street/Sidewalk" | "11218" | "DITMAS AVENUE" | "DITMAS AVENUE" | null | null | null | null | "LATLONG" | "BROOKLYN" | null | "Precinct" | "Closed" | "10/31/2013 06:02:58 AM" | "10/30/2013 10:23:20 PM" | "01 BROOKLYN" | "BROOKLYN" | 991895 | 171051 | "Unspecified" | "BROOKLYN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.636169 | -73.972455 | "(40.63616876563881, -73.972455… |
| 26594167 | "10/30/2013 08:38:25 PM" | "10/30/2013 10:26:28 PM" | "NYPD" | "New York City Police Departmen… | "Noise - Street/Sidewalk" | "Loud Music/Party" | "Street/Sidewalk" | "11218" | "126 BEVERLY ROAD" | "BEVERLY ROAD" | "CHURCH AVENUE" | "EAST 2 STREET" | null | null | "ADDRESS" | "BROOKLYN" | null | "Precinct" | "Closed" | "10/31/2013 04:38:25 AM" | "10/30/2013 10:26:28 PM" | "12 BROOKLYN" | "BROOKLYN" | 990144 | 173511 | "Unspecified" | "BROOKLYN" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "Unspecified" | "N" | null | null | null | null | null | null | null | null | null | null | null | 40.642922 | -73.978762 | "(40.6429222774404, -73.9787617… |
Or if we just wanted a few columns:
complaints.filter(is_noise & in_brooklyn).select('Complaint Type', 'Borough', 'Created Date', 'Descriptor').head(10)| Complaint Type | Borough | Created Date | Descriptor |
|---|---|---|---|
| str | str | str | str |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/31/2013 12:30:36 AM" | "Loud Music/Party" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/31/2013 12:05:10 AM" | "Loud Talking" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/30/2013 11:26:32 PM" | "Loud Music/Party" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/30/2013 10:02:58 PM" | "Loud Talking" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/30/2013 08:38:25 PM" | "Loud Music/Party" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/30/2013 08:32:13 PM" | "Loud Talking" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/30/2013 06:07:39 PM" | "Loud Music/Party" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/30/2013 03:04:51 PM" | "Loud Talking" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/29/2013 10:07:02 PM" | "Loud Talking" |
| "Noise - Street/Sidewalk" | "BROOKLYN" | "10/29/2013 08:15:59 PM" | "Loud Music/Party" |
3.2 So, which borough has the most noise complaints?
noise_complaints = complaints.filter(pl.col('Complaint Type') == "Noise - Street/Sidewalk")
noise_complaints['Borough'].value_counts(sort=True)| Borough | count |
|---|---|
| str | u32 |
| "MANHATTAN" | 917 |
| "BROOKLYN" | 456 |
| "BRONX" | 292 |
| "QUEENS" | 226 |
| "STATEN ISLAND" | 36 |
| "Unspecified" | 1 |
It’s Manhattan! But Manhattan probably has a lot of complaints in total. Maybe it’s better to get the percentage of all complaints that are noise complaints? That would be easy too with the group_by method:
complaint_avgs = (
complaints
.group_by("Borough")
.agg(noise_complaint_avg=(pl.col('Complaint Type') == "Noise - Street/Sidewalk").mean())
.sort('noise_complaint_avg', descending=True)
)
complaint_avgs| Borough | noise_complaint_avg |
|---|---|
| str | f64 |
| "MANHATTAN" | 0.037755 |
| "BRONX" | 0.014833 |
| "BROOKLYN" | 0.013864 |
| "QUEENS" | 0.010143 |
| "STATEN ISLAND" | 0.007474 |
| "Unspecified" | 0.000141 |
It looks like noise complaints make up about 3.7% of all complaints in Manhattan. Which isn’t a lot, but it’s still leading amongst all boroughs.
sbn.barplot(complaint_avgs, x='Borough', y='noise_complaint_avg')<Axes: xlabel='Borough', ylabel='noise_complaint_avg'>
So Manhattan really does complain more about noise than the other boroughs! Neat.
_files/Chapter 3 - Which borough has the most noise complaints (or%2C more selecting data)_19_1.png)